Data input

if (!dir.exists("Simulation_studies")) dir.create("Simulation_studies")
if (!dir.exists("Simulation_studies/Batch_size")) dir.create("Simulation_studies/Batch_size")
if (!dir.exists("Simulation_studies/Pop_heterogeneity")) dir.create("Simulation_studies/Pop_heterogeneity")

RUN_ALL <- FALSE # flag whether or not to re-run fitting

# source code for model fitting
source("Model_calibration_code/simulate_clinical_recurrences.R")
source("Model_calibration_code/clin_inf_likelihood_helper_func.R")
source("Model_calibration_code/Metropolis_Hastings_fit.R")

# parameters under which data are simulated
PARAM_TRUTH_HET <- expand.grid(LAMBDA_MEAN_TRUTH=c(0.25/365, 0.5/365),
                               NU_TRUTH=6, 
                               ETA_TRUTH=c(1/400, 1/200, 1/100),
                               KAPPA_TRUTH=c(0.245, 1.15, 9.1, Inf))

PARAM_TRUTH_HET_SUPP <- expand.grid(LAMBDA_MEAN_TRUTH=c(0.25/365, 0.5/365),
                                    NU_TRUTH=6, 
                                    ETA_TRUTH=c(1/400, 1/200, 1/100),
                                    KAPPA_TRUTH=2.485)

PARAM_TRUTH_HET <- rbind(PARAM_TRUTH_HET, PARAM_TRUTH_HET_SUPP)

PARAM_TRUTH_BATCH <- expand.grid(LAMBDA_MEAN_TRUTH=c(0.5/365),
                                 NU_TRUTH=c(4, 6, 8), 
                                 ETA_TRUTH=c(1/400, 1/200, 1/100),
                                 R_TRUTH=c(0.25, 0.5, 1, 2))

PREL <- 0.4

N_OBS <- 65
TIME_STEP <- 10
PROPHYLAXIS_WINDOW <- 1
BUNCHING_WINDOW <- 2

N_AGES <- rep((2:15)*(365%/%TIME_STEP), each=80)
PCLIN <- rep(1, length(N_AGES))
SEASONALITY <- rep(1, N_OBS+max(N_AGES))

MAX_INF <- 12


# parameters for Metropolis-Hastings proposal distribution
LAMBDA_PROP_SD <- 0.02/365
NU_PROP_SD <- 0.2
ETA_PROP_SD <- 1/2000

# MCMC parametrers
N_CHAINS <- 4
N_ITER <- 32000
N_CORES_PER_CHAIN <- 3
BURNIN_PROP <- 0.5

Population heterogeneity in force of inoculation

We simulate data under a Gamma-distributed force of inoculation to accommodate population heterogeneity; but perform inference under a model which assumes a homogeneous force of inoculation.

simulate_cohort_het <- function(LAMBDA_MEAN, KAPPA, SEASONALITY, TIME_STEP, N_AGES, N_OBS, NU, R, ETA, PREL, U, SHIFT_AGE) {
  t(sapply(N_AGES, function(N_AGE) {
    LAMBDA_SCALE <- ifelse(is.infinite(KAPPA), LAMBDA_MEAN, rgamma(1, shape=KAPPA, scale=LAMBDA_MEAN/KAPPA))
    LAMBDA <- LAMBDA_SCALE*SEASONALITY
    LAMBDA_ADJUSTED <- c(head(LAMBDA, -(N_OBS+N_AGE-SHIFT_AGE))*U, tail(LAMBDA, N_OBS+N_AGE-SHIFT_AGE))
    inf <- simulate_patient(LAMBDA_ADJUSTED, TIME_STEP, N_AGE, N_OBS, NU, R, ETA, PREL) - N_AGE*TIME_STEP
    inf <- (inf[inf>=0 & inf<=N_OBS*TIME_STEP])%/%TIME_STEP+1
    y <- rep(0, N_OBS); y[inf] <- 1; return(y)}))
}

simulate_dataset_het <- function(LAMBDA_MEAN, KAPPA, SEASONALITY, TIME_STEP, N_AGES, N_OBS, NU, R, ETA, PREL, 
                                 PROPHYLAXIS_WINDOW, BUNCHING_WINDOW, PCLIN, MASKING_MATRIX, U, SHIFT_AGE) {
  
  all_inf <- simulate_cohort_het(LAMBDA_MEAN, KAPPA, SEASONALITY, TIME_STEP, N_AGES, N_OBS, NU, R, ETA, PREL, U, SHIFT_AGE)
  inf_matrix <- simulate_masking(all_inf, PCLIN, PROPHYLAXIS_WINDOW, BUNCHING_WINDOW, MASKING_MATRIX)
  
  return(inf_matrix)
}
set.seed("08042024")

pop_het <- list()

for (k in 1:nrow(PARAM_TRUTH_HET)) {
  if (RUN_ALL | !file.exists(paste0("Simulation_studies/Pop_heterogeneity/pop_het_", k, ".rds"))) {
    
    # simulate recurrences using direct stochastic simulation
    inf_matrix <- simulate_cohort_het(PARAM_TRUTH_HET[k, "LAMBDA_MEAN_TRUTH"],
                                      PARAM_TRUTH_HET[k, "KAPPA_TRUTH"], 
                                      SEASONALITY, TIME_STEP, N_AGES, N_OBS, 
                                      PARAM_TRUTH_HET[k, "NU_TRUTH"], 1,
                                      PARAM_TRUTH_HET[k, "ETA_TRUTH"], PREL, U=1, SHIFT_AGE=1)
    
    # retain up to MAX_INF infections per individual
    for (i in 1:nrow(inf_matrix)) {
      y <- cumsum(inf_matrix[i,])
      inf_matrix[i, y>MAX_INF] <- NA
    }
    
    # generate a ternary infection matrix adjusted for post-exposure prophylaxis
    ternary_matrix <- matrix("M", nrow=nrow(inf_matrix), ncol=ncol(inf_matrix))
    ternary_matrix[inf_matrix==0] <- "N"
    ternary_matrix[inf_matrix==1] <- "C"
  
    for (i in 1:nrow(inf_matrix)) {
      inf_times <- which(inf_matrix[i,]==1)
      masked_times <- which(is.na(inf_matrix[i,]))
    
      if (PROPHYLAXIS_WINDOW>0) {
        for (j in 1:ncol(inf_matrix)) {
          if (!is.na(inf_matrix[i, j]) & inf_matrix[i, j]==1) {
            # period of complete prophylactic protection
            inf_matrix[i,  subset(j+(1:PROPHYLAXIS_WINDOW), j+(1:PROPHYLAXIS_WINDOW)<=ncol(inf_matrix))] <- NA
            ternary_matrix[i,  subset(j+(1:PROPHYLAXIS_WINDOW), j+(1:PROPHYLAXIS_WINDOW)<=ncol(inf_matrix))] <- "M"
          
            # prophylactic bunching period
            check <- any(inf_matrix[i,  subset(j+PROPHYLAXIS_WINDOW+(1:BUNCHING_WINDOW), 
                                             j+PROPHYLAXIS_WINDOW+(1:BUNCHING_WINDOW)<=ncol(inf_matrix))]==1)
            check <- ifelse(is.na(check), FALSE, check)
            if (check) {
              inf_matrix[i,  subset(j+PROPHYLAXIS_WINDOW+BUNCHING_WINDOW, 
                                    j+PROPHYLAXIS_WINDOW+BUNCHING_WINDOW<=ncol(inf_matrix))] <- 1
              inf_matrix[i,  subset(j+PROPHYLAXIS_WINDOW+(1:(BUNCHING_WINDOW-1)), 
                                    j+PROPHYLAXIS_WINDOW+(1:(BUNCHING_WINDOW-1))<=ncol(inf_matrix))] <- 0
              ternary_matrix[i,  subset(j+PROPHYLAXIS_WINDOW+(1:BUNCHING_WINDOW), 
                                        j+PROPHYLAXIS_WINDOW+(1:BUNCHING_WINDOW)<=ncol(inf_matrix))] <- "B"
            }
          }
        }
      }
      ternary_matrix[i, masked_times] <- "M" 
    }
  
    # generate S vectors
    S_vectors <- generate_S_vectors(split(ternary_matrix, 1:nrow(ternary_matrix)))
    
    # https://stackoverflow.com/questions/8413188/can-i-nest-parallelparlapply
    # parallelise over chains 
    cl <- makeCluster(N_CHAINS)
  
    # export objects from local function environment
    clusterExport(cl, c('S_vectors', 'N_AGES', 'PREL', 'N_OBS', 'TIME_STEP',
                        'SEASONALITY', 'N_ITER', 'N_CORES_PER_CHAIN',
                        'LAMBDA_PROP_SD', 'NU_PROP_SD', 'ETA_PROP_SD'),
                  envir=environment())
  
    MCMC_results <- parLapply(cl, 1:length(cl), function(CHAIN) {
    
      source("Model_calibration_code/clin_inf_likelihood_helper_func.R")
    
      param_vals <- data.frame(matrix(ncol = 6, nrow = N_ITER))
      colnames(param_vals) <- c("CHAIN", "ITER", "LAMBDA", "NU", "ETA", "LOG_LIK")
      param_vals[, "CHAIN"] <- CHAIN
      param_vals[, "ITER"] <- 1:N_ITER
    
      # initialise parameter values
      param_vals[1, "LAMBDA"] <-  runif(1, 0.1/365, 1/365)
      param_vals[1, "NU"] <-  runif(1, 0.5, 8)
      param_vals[1, "ETA"] <- runif(1, 1/500, 1/50)
      param_vals[1, "LOG_LIK"] <- 
        cohort_likelihood(S_vectors[["S_plus"]], S_vectors[["S_minus"]], N_AGES, 
                          1, SEASONALITY*param_vals[1, "LAMBDA"],
                          param_vals[1, "ETA"], param_vals[1, "NU"], PREL, 
                          N_OBS, TIME_STEP, 1, 1, N_CORES_PER_CHAIN)
    
      for (i in 2:N_ITER) {
        # sample parameter values from proposal distrbution
        lambda_prop <- rnorm(1, param_vals[i-1, "LAMBDA"], LAMBDA_PROP_SD)
        if (lambda_prop<0) lambda_prop <- param_vals[i-1, "LAMBDA"]
      
        nu_prop <- rnorm(1, param_vals[i-1, "NU"], NU_PROP_SD)
        if (nu_prop<0) nu_prop <- param_vals[i-1, "NU"]
      
        eta_prop <- rnorm(1, param_vals[i-1, "ETA"], ETA_PROP_SD)
        if (eta_prop<0) eta_prop <- param_vals[i-1, "ETA"]
      
        # calculate likelihood ratio
        likelihood_prop <- 
          cohort_likelihood(S_vectors[["S_plus"]], S_vectors[["S_minus"]], N_AGES, 
                            1, SEASONALITY*lambda_prop,
                            eta_prop, nu_prop, PREL, N_OBS, TIME_STEP, 
                            1, 1, N_CORES_PER_CHAIN)
      
        likelihood_ratio <- exp(likelihood_prop-param_vals[i-1, "LOG_LIK"])
      
        # accept or reject
        if (runif(1)<=likelihood_ratio) {
          param_vals[i, "LAMBDA"] <- lambda_prop
          param_vals[i, "NU"] <- nu_prop
          param_vals[i, "ETA"] <- eta_prop
          param_vals[i, "LOG_LIK"] <- likelihood_prop
        } else {
          param_vals[i, 3:6] <- param_vals[i-1, 3:6]
        }
      }
    
      return(param_vals)
    
    })
  
    collated_results <- list(MCMC_results=bind_rows(MCMC_results),
                             LAMBDA_MEAN_TRUTH=PARAM_TRUTH_HET[k, "LAMBDA_MEAN_TRUTH"], 
                             NU_TRUTH=PARAM_TRUTH_HET[k, "NU_TRUTH"],
                             KAPPA_TRUTH=PARAM_TRUTH_HET[k, "KAPPA_TRUTH"], 
                             ETA_TRUTH=PARAM_TRUTH_HET[k, "ETA_TRUTH"], 
                             ternary_matrix=ternary_matrix)
  
    write_rds(collated_results, 
              paste0("Simulation_studies/Pop_heterogeneity/pop_het_", k, ".rds"),
              compress="gz")
  } else {
    collated_results <- read_rds(paste0("Simulation_studies/Pop_heterogeneity/pop_het_", k, ".rds"))
  }
  
  pop_het[[k]] <- collated_results
}

## $`000684931506849315`
## NULL
## 
## $`00136986301369863`
## NULL
## $`000684931506849315`
## NULL
## 
## $`00136986301369863`
## NULL

Based on the incidence of symptomatic falciparum malaria (adjusted for treatment failure and left/right censoring), we estimate the degree of transmission heterogeneity in the SPf66 vaccine trial. We pool data across age groups, given there is appears to be no statistically meaningful age structure in the incidence of symptomatic falciparum malaria.

incidence_by_indiv <- read_rds("Spf66_data_processed/incidence_by_individual.rds")

N_ITER_HET <- 8000
LOG_KAPPA_PRIOR_MEAN=0.5
LOG_KAPPA_PRIOR_SD=1
LOG_KAPPA_PROP_SD=0.1

proportion_bites_top_20 <- function(k) {
  0.2+dgamma(qgamma(0.8, shape=k, scale=1/k), shape=k+1, scale=1/k)/k
}

negbin_lhood <- function(lambda, kappa) {
  sum(mapply(function(N, t) dnbinom(N, size=kappa, mu=lambda*t, log=TRUE),
             incidence_by_indiv$N_Pf, incidence_by_indiv$Followup_Pf))
}

set.seed("01052024")

if (RUN_ALL | !file.exists("Simulation_studies/Pop_heterogeneity/SPf66_Pf_het_fits.rds")) {
  
  Pf_het_fit <- data.frame(matrix(ncol = 4, nrow = 0))
  colnames(Pf_het_fit) <- c("CHAIN", "ITER", "LAMBDA", "LOG_KAPPA")

  for (chain in 1:N_CHAINS) {
    # initialise 
    param_vals <- data.frame(matrix(ncol = 3, nrow = N_ITER_HET))
    colnames(param_vals) <- c("LAMBDA", "LOG_KAPPA", "LOG_LIK")
  
    param_vals[1, "LAMBDA"] <-  runif(1, 0.2, 4)
    param_vals[1, "LOG_KAPPA"] <- rnorm(1, LOG_KAPPA_PRIOR_MEAN, LOG_KAPPA_PRIOR_SD)
    param_vals[1, "LOG_LIK"] <- negbin_lhood(param_vals[1, "LAMBDA"], exp(param_vals[1, "LOG_KAPPA"]))
  
    for (i in 2:N_ITER_HET) {
      # sample parameter values from proposal distrbution
      lambda_prop <- rnorm(1, param_vals[i-1, "LAMBDA"], LAMBDA_PROP_SD*365)
      if (lambda_prop<0) lambda_prop <- param_vals[i-1, "LAMBDA"]
    
      kappa_prop <- rnorm(1, param_vals[i-1, "LOG_KAPPA"], LOG_KAPPA_PROP_SD)
    
      # calculate likelihood ratio
      likelihood_prop <- negbin_lhood(lambda_prop, exp(kappa_prop))
    
      likelihood_ratio <- exp(likelihood_prop-param_vals[i-1, "LOG_LIK"])*
        dnorm(kappa_prop, LOG_KAPPA_PRIOR_MEAN, LOG_KAPPA_PRIOR_SD)/
        dnorm(param_vals[i-1, "LOG_KAPPA"], LOG_KAPPA_PRIOR_MEAN, LOG_KAPPA_PRIOR_SD)

      # accept or reject
      if (runif(1)<=likelihood_ratio) {
        param_vals[i, "LAMBDA"] <- lambda_prop
        param_vals[i, "LOG_KAPPA"] <- kappa_prop
        param_vals[i, "LOG_LIK"] <- likelihood_prop
      } else {
        param_vals[i, ] <- param_vals[i-1,]
      }
    }
    Pf_het_fit <- bind_rows(Pf_het_fit, bind_cols(ITER=1:N_ITER_HET, CHAIN=chain, param_vals))
  }
  
  write_rds(Pf_het_fit, 
            "Simulation_studies/Pop_heterogeneity/SPf66_Pf_het_fits.rds",
            compress="gz")
} else {
  Pf_het_fit <- read_rds("Simulation_studies/Pop_heterogeneity/SPf66_Pf_het_fits.rds")
}

Misspecification of sporozoite batch size

The assumption of geometrically-distributed batch sizes may be misspecified. We therefore perform a simulation study, whereby data are simulated under negative binomial sporozoite batch sizes (which may be under- or over-dispersed relative to the geometric distribution), but inference is performed under a misspecified framework predicated on geometric batch sizes.

set.seed("11122023")

batch_sensitivity <- list()

for (k in 1:nrow(PARAM_TRUTH_BATCH)) {
  if (RUN_ALL | !file.exists(paste0("Simulation_studies/Batch_size/batch_sensitivity_", k, ".rds"))) {
    
    # simulate recurrences using direct stochastic simulation
    inf_matrix <- simulate_cohort(SEASONALITY*PARAM_TRUTH_BATCH[k, "LAMBDA_MEAN_TRUTH"],
                                  TIME_STEP, N_AGES, N_OBS, 
                                  PARAM_TRUTH_BATCH[k, "NU_TRUTH"], PARAM_TRUTH_BATCH[k, "R_TRUTH"],
                                  PARAM_TRUTH_BATCH[k, "ETA_TRUTH"], PREL, U=1, SHIFT_AGE=1)
    
    # retain up to MAX_INF infections per individual
    for (i in 1:nrow(inf_matrix)) {
      y <- cumsum(inf_matrix[i,])
      inf_matrix[i, y>MAX_INF] <- NA
    }
    
    # generate a ternary infection matrix adjusted for post-exposure prophylaxis
    ternary_matrix <- matrix("M", nrow=nrow(inf_matrix), ncol=ncol(inf_matrix))
    ternary_matrix[inf_matrix==0] <- "N"
    ternary_matrix[inf_matrix==1] <- "C"
  
    for (i in 1:nrow(inf_matrix)) {
      inf_times <- which(inf_matrix[i,]==1)
      masked_times <- which(is.na(inf_matrix[i,]))
    
      if (PROPHYLAXIS_WINDOW>0) {
        for (j in 1:ncol(inf_matrix)) {
          if (!is.na(inf_matrix[i, j]) & inf_matrix[i, j]==1) {
            # period of complete prophylactic protection
            inf_matrix[i,  subset(j+(1:PROPHYLAXIS_WINDOW), j+(1:PROPHYLAXIS_WINDOW)<=ncol(inf_matrix))] <- NA
            ternary_matrix[i,  subset(j+(1:PROPHYLAXIS_WINDOW), j+(1:PROPHYLAXIS_WINDOW)<=ncol(inf_matrix))] <- "M"
          
            # prophylactic bunching period
            check <- any(inf_matrix[i,  subset(j+PROPHYLAXIS_WINDOW+(1:BUNCHING_WINDOW), 
                                             j+PROPHYLAXIS_WINDOW+(1:BUNCHING_WINDOW)<=ncol(inf_matrix))]==1)
            check <- ifelse(is.na(check), FALSE, check)
            if (check) {
              inf_matrix[i,  subset(j+PROPHYLAXIS_WINDOW+BUNCHING_WINDOW, 
                                    j+PROPHYLAXIS_WINDOW+BUNCHING_WINDOW<=ncol(inf_matrix))] <- 1
              inf_matrix[i,  subset(j+PROPHYLAXIS_WINDOW+(1:(BUNCHING_WINDOW-1)), 
                                    j+PROPHYLAXIS_WINDOW+(1:(BUNCHING_WINDOW-1))<=ncol(inf_matrix))] <- 0
              ternary_matrix[i,  subset(j+PROPHYLAXIS_WINDOW+(1:BUNCHING_WINDOW), 
                                        j+PROPHYLAXIS_WINDOW+(1:BUNCHING_WINDOW)<=ncol(inf_matrix))] <- "B"
            }
          }
        }
      }
      ternary_matrix[i, masked_times] <- "M" 
    }
  
    # generate S vectors
    S_vectors <- generate_S_vectors(split(ternary_matrix, 1:nrow(ternary_matrix)))
    
    # https://stackoverflow.com/questions/8413188/can-i-nest-parallelparlapply
    # parallelise over chains 
    cl <- makeCluster(N_CHAINS)
  
    # export objects from local function environment
    clusterExport(cl, c('S_vectors', 'N_AGES', 'PREL', 'N_OBS', 'TIME_STEP',
                        'SEASONALITY', 'N_ITER', 'N_CORES_PER_CHAIN',
                        'LAMBDA_PROP_SD', 'NU_PROP_SD', 'ETA_PROP_SD'),
                  envir=environment())
  
    MCMC_results <- parLapply(cl, 1:length(cl), function(CHAIN) {
    
      source("Model_calibration_code/clin_inf_likelihood_helper_func.R")
    
      param_vals <- data.frame(matrix(ncol = 6, nrow = N_ITER))
      colnames(param_vals) <- c("CHAIN", "ITER", "LAMBDA", "NU", "ETA", "LOG_LIK")
      param_vals[, "CHAIN"] <- CHAIN
      param_vals[, "ITER"] <- 1:N_ITER
    
      # initialise parameter values
      param_vals[1, "LAMBDA"] <-  runif(1, 0.1/365, 1/365)
      param_vals[1, "NU"] <-  runif(1, 0.5, 8)
      param_vals[1, "ETA"] <- runif(1, 1/500, 1/50)
      param_vals[1, "LOG_LIK"] <- 
        cohort_likelihood(S_vectors[["S_plus"]], S_vectors[["S_minus"]], N_AGES, 
                          1, SEASONALITY*param_vals[1, "LAMBDA"],
                          param_vals[1, "ETA"], param_vals[1, "NU"], PREL, 
                          N_OBS, TIME_STEP, 1, 1, N_CORES_PER_CHAIN)
    
      for (i in 2:N_ITER) {
        # sample parameter values from proposal distrbution
        lambda_prop <- rnorm(1, param_vals[i-1, "LAMBDA"], LAMBDA_PROP_SD)
        if (lambda_prop<0) lambda_prop <- param_vals[i-1, "LAMBDA"]
      
        nu_prop <- rnorm(1, param_vals[i-1, "NU"], NU_PROP_SD)
        if (nu_prop<0) nu_prop <- param_vals[i-1, "NU"]
      
        eta_prop <- rnorm(1, param_vals[i-1, "ETA"], ETA_PROP_SD)
        if (eta_prop<0) eta_prop <- param_vals[i-1, "ETA"]
      
        # calculate likelihood ratio
        likelihood_prop <- 
          cohort_likelihood(S_vectors[["S_plus"]], S_vectors[["S_minus"]], N_AGES, 
                            1, SEASONALITY*lambda_prop,
                            eta_prop, nu_prop, PREL, N_OBS, TIME_STEP, 
                            1, 1, N_CORES_PER_CHAIN)
      
        likelihood_ratio <- exp(likelihood_prop-param_vals[i-1, "LOG_LIK"])
      
        # accept or reject
        if (runif(1)<=likelihood_ratio) {
          param_vals[i, "LAMBDA"] <- lambda_prop
          param_vals[i, "NU"] <- nu_prop
          param_vals[i, "ETA"] <- eta_prop
          param_vals[i, "LOG_LIK"] <- likelihood_prop
        } else {
          param_vals[i, 3:6] <- param_vals[i-1, 3:6]
        }
      }
    
      return(param_vals)
    
    })
  
    collated_results <- list(MCMC_results=bind_rows(MCMC_results),
                            LAMBDA_TRUTH=PARAM_TRUTH_BATCH[k, "LAMBDA_MEAN_TRUTH"], 
                            NU_TRUTH=PARAM_TRUTH_BATCH[k, "NU_TRUTH"],
                            R_TRUTH=PARAM_TRUTH_BATCH[k, "R_TRUTH"], 
                            ETA_TRUTH=PARAM_TRUTH_BATCH[k, "ETA_TRUTH"], 
                            ternary_matrix=ternary_matrix)
  
    write_rds(collated_results, 
              paste0("Simulation_studies/Batch_size/batch_sensitivity_", k, ".rds"),
              compress = "gz")
  } else {
    collated_results <- read_rds(paste0("Simulation_studies/Batch_size/batch_sensitivity_", k, ".rds"))
  }
  
  batch_sensitivity[[k]] <- collated_results
}

## $`4`
## NULL
## 
## $`6`
## NULL
## 
## $`8`
## NULL
## $`4`
## NULL
## 
## $`6`
## NULL
## 
## $`8`
## NULL